package cn.edu360t.Tags
import org.apache.commons.lang3.StringUtils
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.sql.Row
object Tags4KeyWords extends Tags{
  override def makeTags(args: Any*): Map[String, Int] = {
    var map = Map[String, Int]()

    val row = args(0).asInstanceOf[Row]
    val stopdict = args(1).asInstanceOf[Broadcast[Map[String, Null]]]

    // 逻辑
    //获取关键字字段
    val keywords = row.getAs[String]("keywords")
    if (StringUtils.isNotEmpty(keywords)) {
      keywords.split("\\|")
        .filter(word => word.length >= 3 && word.length <= 8) // 长度的符合要求
        .filter(word => !stopdict.value.contains(word)) // 判断是否是否在停用词库中，如果存在则不要
        .foreach(word => map += "K"+word ->  1)
    }


    map
  }
}